November 09, 2015
http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/download_files.html
f <- "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2015.zip" tmp <- tempfile() download.file(f, tmp) unzip(tmp, "annual_all_2015.csv")
library(data.table)
dat <- fread("annual_all_2015.csv")
dat
## State Code County Code Site Num Parameter Code POC Latitude ## 1: 01 003 0010 44201 1 30.49800 ## 2: 01 003 0010 44201 1 30.49800 ## 3: 01 003 0010 44201 1 30.49800 ## 4: 01 003 0010 68101 1 30.49800 ## 5: 01 003 0010 68102 1 30.49800 ## --- ## 43449: 80 002 0015 85101 1 32.58250 ## 43450: 80 002 0019 81102 1 32.60361 ## 43451: 80 002 0019 85101 1 32.60361 ## 43452: 80 002 0021 81102 1 32.52848 ## 43453: 80 002 0021 85101 1 32.52848 ## Longitude Datum Parameter Name Sample Duration ## 1: -87.88141 NAD83 Ozone 1 HOUR ## 2: -87.88141 NAD83 Ozone 8-HR RUN AVG BEGIN HOUR ## 3: -87.88141 NAD83 Ozone 8-HR RUN AVG BEGIN HOUR ## 4: -87.88141 NAD83 Sample Flow Rate- CV 24 HOUR ## 5: -87.88141 NAD83 Sample Volume 24 HOUR ## --- ## 43449: -115.57806 NAD27 PM10 - LC 24 HOUR ## 43450: -115.48610 NAD83 PM10 Total 0-10um STP 24 HOUR ## 43451: -115.48610 NAD83 PM10 - LC 24 HOUR ## 43452: -116.92164 NAD83 PM10 Total 0-10um STP 24 HOUR ## 43453: -116.92164 NAD83 PM10 - LC 24 HOUR ## Pollutant Standard ## 1: Ozone 1-hour Daily 2005 ## 2: Ozone 8-Hour 1997 ## 3: Ozone 8-Hour 2008 ## 4: ## 5: ## --- ## 43449: ## 43450: PM10 24-hour 2006 ## 43451: ## 43452: PM10 24-hour 2006 ## 43453: ## Metric Used ## 1: Daily maxima of observed hourly values (between 9:00 AM and 8:00 PM) ## 2: Daily maximum of 8 hour running average of observed hourly values ## 3: Daily maximum of 8 hour running average of observed hourly values ## 4: Observed Values ## 5: Observed Values ## --- ## 43449: Observed Values ## 43450: Daily Mean ## 43451: Observed Values ## 43452: Daily Mean ## 43453: Observed Values ## Method Name Year ## 1: INSTRUMENTAL - ULTRA VIOLET 2015 ## 2: 2015 ## 3: 2015 ## 4: R & P Model 2025 PM2.5 Sequent - Calculation 2015 ## 5: R & P Model 2025 PM2.5 Sequent - Calculation 2015 ## --- ## 43449: HI-VOL-SA/GMW1200 - GRAVIMETRIC 2015 ## 43450: HI-VOL SA/GMW-1200 - GRAVIMETRIC 2015 ## 43451: HI-VOL-SA/GMW1200 - GRAVIMETRIC 2015 ## 43452: HI-VOL SA/GMW-1200 - GRAVIMETRIC 2015 ## 43453: HI-VOL-SA/GMW1200 - GRAVIMETRIC 2015 ## Units of Measure Event Type Observation Count ## 1: Parts per million No Events 705 ## 2: Parts per million No Events 733 ## 3: Parts per million No Events 733 ## 4: Percent No Events 28 ## 5: Cubic meter No Events 28 ## --- ## 43449: Micrograms/cubic meter (LC) No Events 8 ## 43450: Micrograms/cubic meter (25 C) No Events 8 ## 43451: Micrograms/cubic meter (LC) No Events 8 ## 43452: Micrograms/cubic meter (25 C) No Events 6 ## 43453: Micrograms/cubic meter (LC) No Events 6 ## Observation Percent Completeness Indicator Valid Day Count ## 1: 13 N 31 ## 2: 13 N 31 ## 3: 13 N 31 ## 4: 47 N 14 ## 5: 47 N 14 ## --- ## 43449: 13 N 8 ## 43450: 13 N 8 ## 43451: 13 N 8 ## 43452: 10 N 6 ## 43453: 10 N 6 ## Required Day Count Exceptional Data Count Null Data Count ## 1: 245 0 39 ## 2: 245 0 0 ## 3: 245 0 0 ## 4: 60 0 0 ## 5: 60 0 0 ## --- ## 43449: 60 0 7 ## 43450: 60 0 7 ## 43451: 60 0 7 ## 43452: 60 0 9 ## 43453: 60 0 9 ## Primary Exceedance Count Secondary Exceedance Count ## 1: 0 0 ## 2: 0 0 ## 3: 0 0 ## 4: ## 5: ## --- ## 43449: ## 43450: 1 1 ## 43451: ## 43452: 1 1 ## 43453: ## Certification Indicator Num Obs Below MDL Arithmetic Mean ## 1: Certification not required 8 0.041290 ## 2: Certification not required 0 0.036871 ## 3: Certification not required 0 0.036871 ## 4: Certification not required 21 0.069643 ## 5: Certification not required 0 24.085714 ## --- ## 43449: Certification not required 0 71.125000 ## 43450: Certification not required 0 93.750000 ## 43451: Certification not required 0 95.625000 ## 43452: Certification not required 0 94.500000 ## 43453: Certification not required 0 96.833333 ## Arithmetic Standard Dev 1st Max Value 1st Max DateTime ## 1: 0.011719 0.064 2015-03-30 17:00 ## 2: 0.009760 0.056 2015-03-30 11:00 ## 3: 0.009760 0.056 2015-03-30 11:00 ## 4: 0.041587 0.200 2015-02-26 00:00 ## 5: 0.035635 24.100 2015-01-03 00:00 ## --- ## 43449: 40.087182 128.000 2015-02-11 00:00 ## 43450: 61.381128 203.000 2015-02-05 00:00 ## 43451: 62.996457 209.000 2015-02-05 00:00 ## 43452: 103.583300 302.000 2015-03-13 00:00 ## 43453: 105.934728 309.000 2015-03-13 00:00 ## 2nd Max Value 2nd Max DateTime 3rd Max Value 3rd Max DateTime ## 1: 0.05900 2015-03-17 14:00 0.05800 2015-03-31 17:00 ## 2: 0.05100 2015-03-17 09:00 0.05000 2015-03-08 08:00 ## 3: 0.05100 2015-03-17 09:00 0.05000 2015-03-08 08:00 ## 4: 0.20000 2015-03-16 00:00 0.10000 2015-01-09 00:00 ## 5: 24.10000 2015-01-06 00:00 24.10000 2015-01-12 00:00 ## --- ## 43449: 116 2015-03-25 00:00 96 2015-02-17 00:00 ## 43450: 125 2015-03-07 00:00 125 2015-03-25 00:00 ## 43451: 127 2015-03-07 00:00 126 2015-03-25 00:00 ## 43452: 78 2015-03-07 00:00 66 2015-02-17 00:00 ## 43453: 79 2015-03-07 00:00 68 2015-02-17 00:00 ## 4th Max Value 4th Max DateTime 1st Max Non Overlapping Value ## 1: 0.05400 2015-03-07 15:00 ## 2: 0.04900 2015-03-07 10:00 ## 3: 0.04900 2015-03-07 10:00 ## 4: 0.10000 2015-01-18 00:00 ## 5: 24.10000 2015-01-15 00:00 ## --- ## 43449: 73 2015-03-31 00:00 ## 43450: 115 2015-02-11 00:00 ## 43451: 119 2015-02-11 00:00 ## 43452: 62 2015-02-23 00:00 ## 43453: 65 2015-02-23 00:00 ## 1st NO Max DateTime 2nd Max Non Overlapping Value ## 1: ## 2: ## 3: ## 4: ## 5: ## --- ## 43449: ## 43450: ## 43451: ## 43452: ## 43453: ## 2nd NO Max DateTime 99th Percentile 98th Percentile 95th Percentile ## 1: 0.064 0.064 0.059 ## 2: 0.056 0.056 0.051 ## 3: 0.056 0.056 0.051 ## 4: 0.200 0.200 0.200 ## 5: 24.100 24.100 24.100 ## --- ## 43449: 128.000 128.000 128.000 ## 43450: 203.000 203.000 203.000 ## 43451: 209.000 209.000 209.000 ## 43452: 302.000 302.000 302.000 ## 43453: 309.000 309.000 309.000 ## 90th Percentile 75th Percentile 50th Percentile 10th Percentile ## 1: 0.054 0.053 0.038 0.028 ## 2: 0.049 0.045 0.036 0.026 ## 3: 0.049 0.045 0.036 0.026 ## 4: 0.100 0.100 0.050 0.050 ## 5: 24.100 24.100 24.100 24.000 ## --- ## 43449: 128.000 116.000 73.000 14.000 ## 43450: 203.000 125.000 115.000 16.000 ## 43451: 209.000 127.000 119.000 17.000 ## 43452: 302.000 78.000 66.000 29.000 ## 43453: 309.000 79.000 68.000 30.000 ## Local Site Name ## 1: FAIRHOPE, Alabama ## 2: FAIRHOPE, Alabama ## 3: FAIRHOPE, Alabama ## 4: FAIRHOPE, Alabama ## 5: FAIRHOPE, Alabama ## --- ## 43449: PROGRESSO, 1 BLOCK NORTH OF HGWY 2 IN COLONIAL PROGRESSO, MEXICALI ## 43450: AT CESPM, COMISION ESTATAL DE SERVICIOS PUBLICOS DE MEXICALI ## 43451: AT CESPM, COMISION ESTATAL DE SERVICIOS PUBLICOS DE MEXICALI ## 43452: AT LABORATORIO TIJUANA, BCN ## 43453: AT LABORATORIO TIJUANA, BCN ## Address ## 1: FAIRHOPE HIGH SCHOOL, FAIRHOPE, ALABAMA ## 2: FAIRHOPE HIGH SCHOOL, FAIRHOPE, ALABAMA ## 3: FAIRHOPE HIGH SCHOOL, FAIRHOPE, ALABAMA ## 4: FAIRHOPE HIGH SCHOOL, FAIRHOPE, ALABAMA ## 5: FAIRHOPE HIGH SCHOOL, FAIRHOPE, ALABAMA ## --- ## 43449: PROGRESSO, CENTRO DE SALUD, MEXICALI ## 43450: CESPM, COMISION ESTATAL DE SERVICIOS PUBLICOS DE MEXICALI ## 43451: CESPM, COMISION ESTATAL DE SERVICIOS PUBLICOS DE MEXICALI ## 43452: Calle Dos Oriente S/N Esq. Con calle Nueve Sur Ciudad Industrial Nueva Tijuana cp 22454 ## 43453: Calle Dos Oriente S/N Esq. Con calle Nueve Sur Ciudad Industrial Nueva Tijuana cp 22454 ## State Name County Name City Name ## 1: Alabama Baldwin Fairhope ## 2: Alabama Baldwin Fairhope ## 3: Alabama Baldwin Fairhope ## 4: Alabama Baldwin Fairhope ## 5: Alabama Baldwin Fairhope ## --- ## 43449: Country Of Mexico BAJA CALIFORNIA NORTE Mexicali ## 43450: Country Of Mexico BAJA CALIFORNIA NORTE Mexicali ## 43451: Country Of Mexico BAJA CALIFORNIA NORTE Mexicali ## 43452: Country Of Mexico BAJA CALIFORNIA NORTE Tijuana ## 43453: Country Of Mexico BAJA CALIFORNIA NORTE Tijuana ## CBSA Name Date of Last Change ## 1: Daphne-Fairhope-Foley, AL 2015-05-19 ## 2: Daphne-Fairhope-Foley, AL 2015-05-19 ## 3: Daphne-Fairhope-Foley, AL 2015-05-19 ## 4: Daphne-Fairhope-Foley, AL 2015-05-19 ## 5: Daphne-Fairhope-Foley, AL 2015-05-19 ## --- ## 43449: 2015-05-28 ## 43450: 2015-05-28 ## 43451: 2015-05-28 ## 43452: 2015-05-28 ## 43453: 2015-05-28
Find patterns in filenames
years <- 2010:2015
files <- paste0("http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_",
years, ".zip")
files
## [1] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2010.zip" ## [2] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2011.zip" ## [3] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2012.zip" ## [4] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2013.zip" ## [5] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2014.zip" ## [6] "http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_2015.zip"
Create a small utility function
download_epa_aqs <- function(years) {
files <- paste0("http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/annual_all_",
years, ".zip")
for (i in seq(years)) {
tmp <- tempfile()
download.file(files[i], tmp)
unzip(tmp, paste0("annual_all_", years[i], ".csv"))
}
}
download_epa_aqs(2000:2002)
http://energyalmanac.ca.gov/electricity/web_qfer/Heat_Rates.php
library(XML) f <- "http://energyalmanac.ca.gov/electricity/web_qfer/Heat_Rates.php" d <- readHTMLTable(f, which = 1) head(d)
## Category v 2014 Â Company Name ## 1 C 2014 ACE Cogeneration Co ## 2 C 2014 Karnavati Holdings, Inc. ## 3 C 2014 Los Angeles Department of Water & Power (LADWP) ## 4 C 2014 Rio Bravo ## 5 C 2014 Rio Bravo ## 6 C 2014 Tesoro Refining & Marketing Company LLC ## Plant ID Plant Name ## 1 C0001 ACE Cogeneration (ACE is Argus Cogen Expansion - Retired) ## 2 C0017 Argus Cogen Plant ## 3 C0023 Intermountain Power Project (UT) ## 4 C0018 Rio Bravo Jasmin ## 5 C0022 Rio Bravo Poso ## 6 C0002 Los Angeles Refinery - Calciner ## Net MWh Main MMBTU Heat Rate ## 1 333205 4596520 13.7948710253448 ## 2 257374 13830280 53.7361194215422 ## 3 12369820 115061800 9.30181684131216 ## 4 72667.1015625 847439 11.6619347927498 ## 5 116102 1334220 11.4917917004014 ## 6 208442 3007930 14.4305370318842
https://en.wikipedia.org/wiki/List_of_highest-grossing_films
library(rvest)
## Loading required package: xml2 ## ## Attaching package: 'rvest' ## ## The following object is masked from 'package:XML': ## ## xml
d <- read_html("https://en.wikipedia.org/wiki/List_of_highest-grossing_films") %>%
html_node(".wikitable") %>%
html_table()
head(d)
## Rank Peak Title Worldwide gross Year Reference(s) ## 1 1 1 Avatar $2,787,965,087 2009 [# 1][# 2] ## 2 2 1 Titanic $2,186,772,302 1997 [# 3][# 4] ## 3 3 3 Jurassic World $1,666,050,585 2015 [# 5][# 6] ## 4 4 3 The Avengers $1,519,557,910 2012 [# 7][# 8] ## 5 5 4 Furious 7 $1,514,827,481 2015 [# 9][# 10] ## 6 6 5 Avengers: Age of Ultron $1,402,805,868 2015 [# 11][# 10]
read_html("http://www.imdb.com/title/tt0053779/") %>%
html_nodes("#titleCast .itemprop span") %>%
html_text()
## [1] "Marcello Mastroianni" "Anita Ekberg" "Anouk Aimée" ## [4] "Yvonne Furneaux" "Magali Noël" "Alain Cuny" ## [7] "Annibale Ninchi" "Walter Santesso" "Lex Barker" ## [10] "Jacques Sernas" "Nadia Gray" "Valeria Ciangottini" ## [13] "Riccardo Garrone" "Ida Galli" "Audrey McDonald"
import imdb access = imdb.IMDb() movie = access.get_movie(53779) print movie # La dolce vita
Application Programming Interface
Example: census data. Key required: http://api.census.gov/data/key_signup.html
http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html
d <- fread("annual_all_2015.csv")
names(d)
## [1] "State Code" "County Code" ## [3] "Site Num" "Parameter Code" ## [5] "POC" "Latitude" ## [7] "Longitude" "Datum" ## [9] "Parameter Name" "Sample Duration" ## [11] "Pollutant Standard" "Metric Used" ## [13] "Method Name" "Year" ## [15] "Units of Measure" "Event Type" ## [17] "Observation Count" "Observation Percent" ## [19] "Completeness Indicator" "Valid Day Count" ## [21] "Required Day Count" "Exceptional Data Count" ## [23] "Null Data Count" "Primary Exceedance Count" ## [25] "Secondary Exceedance Count" "Certification Indicator" ## [27] "Num Obs Below MDL" "Arithmetic Mean" ## [29] "Arithmetic Standard Dev" "1st Max Value" ## [31] "1st Max DateTime" "2nd Max Value" ## [33] "2nd Max DateTime" "3rd Max Value" ## [35] "3rd Max DateTime" "4th Max Value" ## [37] "4th Max DateTime" "1st Max Non Overlapping Value" ## [39] "1st NO Max DateTime" "2nd Max Non Overlapping Value" ## [41] "2nd NO Max DateTime" "99th Percentile" ## [43] "98th Percentile" "95th Percentile" ## [45] "90th Percentile" "75th Percentile" ## [47] "50th Percentile" "10th Percentile" ## [49] "Local Site Name" "Address" ## [51] "State Name" "County Name" ## [53] "City Name" "CBSA Name" ## [55] "Date of Last Change"
d <- d[! `State Name` %in% c("Hawaii", "Puerto Rico", "Alaska")]
d <- d[`Parameter Code` == 88101]
library(sp) spD <- copy(d) coordinates(spD) <- ~ Longitude + Latitude
plot(spD)
library(ggmap, quietly = TRUE)
usa <- map_data("state")
ggplot() + geom_point(data = d, aes(Longitude, Latitude))
ggplot() +
geom_polygon(data=usa, aes(x = long, y = lat, group = group),
color = "grey", fill = "white") +
geom_point(data = d, aes(Longitude, Latitude))
ggplot() +
geom_polygon(data=usa, aes(x = long, y = lat, group = group),
color = "grey", fill = "white") +
geom_point(data = d, aes(Longitude, Latitude)) +
coord_map("polyconic")
ggplot() +
geom_polygon(data = usa, aes(x = long, y = lat, group = group),
color = "grey", fill = "white") +
geom_point(data = d, aes(Longitude, Latitude)) +
coord_map("polyconic", orientation = c(141, -74, 0))
EPSG (European Petroleum Survey Group): EPSG was a scientific organization with ties to the European petroleum industry consisting of specialists working in applied geodesy, surveying, and cartography related to oil exploration.
spD <- copy(d)
coordinates(spD) <- ~ Longitude + Latitude
proj4string(spD) <- CRS("+init=epsg:4326")
plot(spD)
spD2 <- spTransform(spD, CRS("+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"))
plot(spD2)
leaflet (and grammars again)set.seed(02138)
small_d <- dplyr::sample_n(d, size = 100)
small_d <- dplyr::mutate(small_d,
popup_text = paste0(round(`Arithmetic Mean`, 2)))
library(leaflet)
leaflet (and grammars again)leaflet(data = small_d) %>% addTiles() %>% # Add default OpenStreetMap map tiles addMarkers(~Longitude, ~Latitude, popup = ~popup_text)
leaflet (and grammars again)leaflet(data = d) %>% addTiles() %>% addMarkers(clusterOptions = markerClusterOptions() )
## Assuming 'Longitude' and 'Latitude' are longitude and latitude, respectively
googleVishttps://cran.r-project.org/web/packages/googleVis/vignettes/googleVis_examples.html
library(googleVis)
## ## Welcome to googleVis version 0.5.10 ## ## Please read the Google API Terms of Use ## before you start using the package: ## https://developers.google.com/terms/ ## ## Note, the plot method of googleVis will by default use ## the standard browser to display its output. ## ## See the googleVis package vignettes for more details, ## or visit http://github.com/mages/googleVis. ## ## To suppress this message use: ## suppressPackageStartupMessages(library(googleVis))
op <- options(gvis.plot.tag='chart')
googleVisD <- fread("annual_all_2015.csv")
small_d <- dplyr::mutate(small_d,
LatLong = paste(Latitude, Longitude, sep = ":"))
googleVisgvisGeoChart(small_d, "LatLong", options = list(region = "US"),
hovervar = "popup_text")
googleVisgvisMap(small_d, "LatLong", options = list(region = "US"),
tipvar = "popup_text")
ui.R
server.R
Then application is deployed
ui.RshinyUI(fluidPage(
leafletOutput('map1'),
actionButton('add', 'Add marker cluster'),
actionButton('clear', 'Clear marker cluster')
))
server.Rd <- fread("annual_all_2015.csv")
shinyServer(function(input, output) {
output$map1 = renderLeaflet({
leaflet() %>% addTiles() %>% setView(-97, 41, 4)
})
observeEvent(input$add, {
leafletProxy('map1') %>% addMarkers(
data = d,
popup = ~sprintf('PM 2.5 = %s', round(`Arithmetic Mean`, 2)), layerId = rownames(d),
clusterOptions = markerClusterOptions(), clusterId = 'cluster1'
)
})
observeEvent(input$clear, {
leafletProxy('map1') %>% clearMarkerClusters()
})
})